{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "view-in-github"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/TeleStats/PA22_replication/blob/main/PA22_replication.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Qt2f0jcP_t5s"
   },
   "source": [
    "# Replication package for for PA2022 submission \"Face detection, tracking, and classification from large-scale news archives for analysis of key political figures\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "PjIyCuwNID_l"
   },
   "source": [
    "#### Download face detection, face features, and ground truth annotations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "tItJ2eq2HW4W"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2023-05-24 05:53:19--  http://www.satoh-lab.nii.ac.jp/member/agirbau/telestats/files/data.tar.gz\n",
      "Resolving www.satoh-lab.nii.ac.jp (www.satoh-lab.nii.ac.jp)... 49.212.198.177\n",
      "Connecting to www.satoh-lab.nii.ac.jp (www.satoh-lab.nii.ac.jp)|49.212.198.177|:80... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 846103431 (807M) [application/x-gzip]\n",
      "Saving to: ‘data.tar.gz’\n",
      "\n",
      "data.tar.gz         100%[===================>] 806.91M  2.51MB/s    in 4m 4s   \n",
      "\n",
      "2023-05-24 05:57:23 (3.31 MB/s) - ‘data.tar.gz’ saved [846103431/846103431]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Download and prepare data folder\n",
    "# Takes around 4 minutes\n",
    "# This contains:\n",
    "# Download the embeddings corresponding to the individuals' models\n",
    "# Download precomputed detections and face features embeddedings\n",
    "# Download ground truth data for evaluation\n",
    "!wget --no-check-certificate www.satoh-lab.nii.ac.jp/member/agirbau/telestats/files/data.tar.gz\n",
    "!tar -xf data.tar.gz data\n",
    "!rm data.tar.gz"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "QN_e1fwYuMtI"
   },
   "source": [
    "## Run the tracking + classification part of the method\n",
    "#### We already provide detections and face embeddings (previously downloaded). Run tracking and classification part to assign an ID to each detected face.\n",
    "###### Please, run this code for all the different configurations, as done in the paper.\n",
    "######  e.g. channel = \"CNNW\", detector = \"yolo\", classifier = \"fcg_average_vote\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "3liqxCIxO8H3"
   },
   "outputs": [],
   "source": [
    "#### Automatically run the face tracking and classification for all channels, detectors, and classifiers ####\n",
    "channel_list = [\"CNNW\", \"FOXNEWSW\", \"MSNBCW\", \"news7-lv\", \"hodost-lv\"]  # news7-lv (NHK), hodost-lv (HODO Station), CNNW (CNN), FOXNEWSW (FOX), MSNBCW (MSNBC)\n",
    "detector_list = [\"dfsd\", \"mtcnn\", \"yolo\"]\n",
    "feats_list = [\"resnetv1\"]  # resnetv1 (Inception-resnet as backbone)\n",
    "classifier_list = [\"knn_3\", \"fcg_average_centroid\", \"fcg_average_vote\", \"fcgNT_average_vote\"]  # knn_3, fcg_average_centroid, fcg_average_vote, fcgNT_average_vote (for \"No Tracking\" experiment in Table 6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "Z2CSAk7_I6es"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mode:train  Channel:CNNW  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 2.0\n",
      "Mode:train  Channel:CNNW  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 2.1\n",
      "Mode:train  Channel:CNNW  Detector:dfsd  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 3.2\n",
      "Mode:train  Channel:CNNW  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 2.0\n",
      "Mode:train  Channel:CNNW  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 2.1\n",
      "Mode:train  Channel:CNNW  Detector:mtcnn  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 3.1\n",
      "Mode:train  Channel:CNNW  Detector:yolo  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 2.1\n",
      "Mode:train  Channel:CNNW  Detector:yolo  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 2.1\n",
      "Mode:train  Channel:CNNW  Detector:yolo  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 3.3\n",
      "Mode:train  Channel:FOXNEWSW  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 2.8\n",
      "Mode:train  Channel:FOXNEWSW  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 2.9\n",
      "Mode:train  Channel:FOXNEWSW  Detector:dfsd  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 4.2\n",
      "Mode:train  Channel:FOXNEWSW  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 2.7\n",
      "Mode:train  Channel:FOXNEWSW  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 2.8\n",
      "Mode:train  Channel:FOXNEWSW  Detector:mtcnn  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 4.0\n",
      "Mode:train  Channel:FOXNEWSW  Detector:yolo  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 2.8\n",
      "Mode:train  Channel:FOXNEWSW  Detector:yolo  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 2.9\n",
      "Mode:train  Channel:FOXNEWSW  Detector:yolo  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 4.3\n",
      "Mode:train  Channel:MSNBCW  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 3.4\n",
      "Mode:train  Channel:MSNBCW  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 3.5\n",
      "Mode:train  Channel:MSNBCW  Detector:dfsd  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 5.1\n",
      "Mode:train  Channel:MSNBCW  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 3.4\n",
      "Mode:train  Channel:MSNBCW  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 3.4\n",
      "Mode:train  Channel:MSNBCW  Detector:mtcnn  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 4.9\n",
      "Mode:train  Channel:MSNBCW  Detector:yolo  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 3.5\n",
      "Mode:train  Channel:MSNBCW  Detector:yolo  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 3.6\n",
      "Mode:train  Channel:MSNBCW  Detector:yolo  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 5.3\n",
      "Mode:train  Channel:news7-lv  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 16.7\n",
      "Mode:train  Channel:news7-lv  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 19.7\n",
      "Mode:train  Channel:news7-lv  Detector:dfsd  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 56.5\n",
      "Mode:train  Channel:news7-lv  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 16.0\n",
      "Mode:train  Channel:news7-lv  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 18.3\n",
      "Mode:train  Channel:news7-lv  Detector:mtcnn  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 52.8\n",
      "Mode:train  Channel:news7-lv  Detector:yolo  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 19.6\n",
      "Mode:train  Channel:news7-lv  Detector:yolo  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 23.1\n",
      "Mode:train  Channel:news7-lv  Detector:yolo  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 68.0\n",
      "Mode:train  Channel:hodost-lv  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 17.3\n",
      "Mode:train  Channel:hodost-lv  Detector:dfsd  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 20.4\n",
      "Mode:train  Channel:hodost-lv  Detector:dfsd  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 58.4\n",
      "Mode:train  Channel:hodost-lv  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 16.6\n",
      "Mode:train  Channel:hodost-lv  Detector:mtcnn  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 19.3\n",
      "Mode:train  Channel:hodost-lv  Detector:mtcnn  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 54.5\n",
      "Mode:train  Channel:hodost-lv  Detector:yolo  Features:resnetv1  Classifier:fcg_average_centroid\n",
      "Elapsed time: 18.8\n",
      "Mode:train  Channel:hodost-lv  Detector:yolo  Features:resnetv1  Classifier:fcg_average_vote\n",
      "Elapsed time: 22.3\n",
      "Mode:train  Channel:hodost-lv  Detector:yolo  Features:resnetv1  Classifier:fcgNT_average_vote\n",
      "Elapsed time: 64.6\n"
     ]
    }
   ],
   "source": [
    "# Run classification for all the different options\n",
    "# Takes around 1 hour to execute all combinations\n",
    "for channel in channel_list:\n",
    "    models_path = \"faces_politicians\" if channel in ['news7-lv', 'hodost-lv'] else \"faces_us_individuals\"\n",
    "    for detector in detector_list:\n",
    "        for feats in feats_list:\n",
    "            for classifier in classifier_list:\n",
    "                !python src/face_classifier.py train \"$channel\" --models_path data/\"$models_path\" --detector \"$detector\" --feats \"$feats\" --mod_feat \"$classifier\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ayODzU-is3mp"
   },
   "source": [
    "## Replication of Tables 3-6\n",
    "#### Method performance for different channel-detector-classifier triplets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "yIbzBBGEIvHe"
   },
   "source": [
    "#### Configuration\n",
    "###### **channel** --> news7-lv (NHK), hodost-lv (HODO Station), CNNW (CNN), FOXNEWSW (FOX), MSNBCW (MSNBC)\n",
    "###### **detector** --> dfsd, mtcnn, yolo\n",
    "###### **classifier** --> knn_3 (KNN), fcg_average_centroid (Centroid), fcg_average_vote (Vote), fcgNT_average_vote (for \"No Tracking\" in Table 6)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###### Run this to reproduce tables 3-6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "id": "ZozotZOtrTWF"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CNN-DFSD: Missed detections=4.62%\n",
      "CNN-MTCNN: Missed detections=5.36%\n",
      "CNN-YOLO: Missed detections=1.11%\n",
      "FOX-DFSD: Missed detections=3.88%\n",
      "FOX-MTCNN: Missed detections=4.6%\n",
      "FOX-YOLO: Missed detections=0.96%\n",
      "MSNBC-DFSD: Missed detections=5.16%\n",
      "MSNBC-MTCNN: Missed detections=6.36%\n",
      "MSNBC-YOLO: Missed detections=1.01%\n",
      "NHK-DFSD: Missed detections=3.13%\n",
      "NHK-MTCNN: Missed detections=2.84%\n",
      "NHK-YOLO: Missed detections=0.65%\n",
      "HODO-DFSD: Missed detections=6.28%\n",
      "HODO-MTCNN: Missed detections=8.55%\n",
      "HODO-YOLO: Missed detections=0.33%\n",
      "Elapsed: 192.77262353897095 seconds\n"
     ]
    }
   ],
   "source": [
    "# Table 3\n",
    "# Amount of missed detections for the specified detector\n",
    "# Takes around 3-4 minutes to execute all combinations\n",
    "import time\n",
    "start_time = time.time()\n",
    "for channel in channel_list:\n",
    "    models_path = \"faces_politicians\" if channel in ['news7-lv', 'hodost-lv'] else \"faces_us_individuals\"\n",
    "    for detector in detector_list:\n",
    "        !python src/metrics.py train \"$channel\" --models_path data/\"$models_path\" --detector \"$detector\" --use_dets\n",
    "end_time = time.time()\n",
    "print(f\"Elapsed: {end_time - start_time} seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "umTRZmeC05aE"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CNN-DFSD-KNN: P=1.0, R=0.63, F1=0.773\n",
      "CNN-MTCNN-KNN: P=1.0, R=0.64, F1=0.779\n",
      "CNN-YOLO-KNN: P=1.0, R=0.69, F1=0.814\n",
      "CNN-DFSD-centroid: P=1.0, R=0.74, F1=0.85\n",
      "CNN-MTCNN-centroid: P=1.0, R=0.73, F1=0.844\n",
      "CNN-YOLO-centroid: P=1.0, R=0.8, F1=0.891\n",
      "CNN-DFSD-vote: P=1.0, R=0.78, F1=0.875\n",
      "CNN-MTCNN-vote: P=1.0, R=0.77, F1=0.869\n",
      "CNN-YOLO-vote: P=1.0, R=0.82, F1=0.902\n",
      "CNN-DFSD-vote (no tracking): P=1.0, R=0.65, F1=0.787\n",
      "CNN-MTCNN-vote (no tracking): P=1.0, R=0.65, F1=0.787\n",
      "CNN-YOLO-vote (no tracking): P=1.0, R=0.7, F1=0.827\n",
      "FOX-DFSD-KNN: P=1.0, R=0.66, F1=0.796\n",
      "FOX-MTCNN-KNN: P=1.0, R=0.64, F1=0.782\n",
      "FOX-YOLO-KNN: P=1.0, R=0.7, F1=0.822\n",
      "FOX-DFSD-centroid: P=1.0, R=0.76, F1=0.86\n",
      "FOX-MTCNN-centroid: P=1.0, R=0.74, F1=0.852\n",
      "FOX-YOLO-centroid: P=1.0, R=0.79, F1=0.883\n",
      "FOX-DFSD-vote: P=1.0, R=0.77, F1=0.867\n",
      "FOX-MTCNN-vote: P=1.0, R=0.75, F1=0.858\n",
      "FOX-YOLO-vote: P=1.0, R=0.8, F1=0.889\n",
      "FOX-DFSD-vote (no tracking): P=1.0, R=0.67, F1=0.804\n",
      "FOX-MTCNN-vote (no tracking): P=1.0, R=0.66, F1=0.793\n",
      "FOX-YOLO-vote (no tracking): P=1.0, R=0.71, F1=0.83\n",
      "MSNBC-DFSD-KNN: P=1.0, R=0.6, F1=0.75\n",
      "MSNBC-MTCNN-KNN: P=1.0, R=0.58, F1=0.732\n",
      "MSNBC-YOLO-KNN: P=1.0, R=0.65, F1=0.786\n",
      "MSNBC-DFSD-centroid: P=1.0, R=0.72, F1=0.837\n",
      "MSNBC-MTCNN-centroid: P=1.0, R=0.66, F1=0.793\n",
      "MSNBC-YOLO-centroid: P=1.0, R=0.73, F1=0.846\n",
      "MSNBC-DFSD-vote: P=1.0, R=0.73, F1=0.844\n",
      "MSNBC-MTCNN-vote: P=1.0, R=0.69, F1=0.814\n",
      "MSNBC-YOLO-vote: P=1.0, R=0.76, F1=0.865\n",
      "MSNBC-DFSD-vote (no tracking): P=1.0, R=0.61, F1=0.761\n",
      "MSNBC-MTCNN-vote (no tracking): P=1.0, R=0.59, F1=0.744\n",
      "MSNBC-YOLO-vote (no tracking): P=1.0, R=0.66, F1=0.797\n",
      "NHK-DFSD-KNN: P=0.81, R=0.75, F1=0.776\n",
      "NHK-MTCNN-KNN: P=0.79, R=0.74, F1=0.766\n",
      "NHK-YOLO-KNN: P=0.81, R=0.78, F1=0.793\n",
      "NHK-DFSD-centroid: P=0.77, R=0.79, F1=0.78\n",
      "NHK-MTCNN-centroid: P=0.79, R=0.82, F1=0.806\n",
      "NHK-YOLO-centroid: P=0.79, R=0.84, F1=0.816\n",
      "NHK-DFSD-vote: P=0.82, R=0.78, F1=0.799\n",
      "NHK-MTCNN-vote: P=0.86, R=0.78, F1=0.819\n",
      "NHK-YOLO-vote: P=0.84, R=0.81, F1=0.825\n",
      "NHK-DFSD-vote (no tracking): P=0.79, R=0.75, F1=0.769\n",
      "NHK-MTCNN-vote (no tracking): P=0.78, R=0.75, F1=0.765\n",
      "NHK-YOLO-vote (no tracking): P=0.8, R=0.78, F1=0.791\n",
      "HODO-DFSD-KNN: P=0.8, R=0.63, F1=0.701\n",
      "HODO-MTCNN-KNN: P=0.77, R=0.63, F1=0.696\n",
      "HODO-YOLO-KNN: P=0.79, R=0.66, F1=0.72\n",
      "HODO-DFSD-centroid: P=0.78, R=0.7, F1=0.741\n",
      "HODO-MTCNN-centroid: P=0.78, R=0.72, F1=0.749\n",
      "HODO-YOLO-centroid: P=0.75, R=0.75, F1=0.754\n",
      "HODO-DFSD-vote: P=0.85, R=0.7, F1=0.768\n",
      "HODO-MTCNN-vote: P=0.87, R=0.69, F1=0.768\n",
      "HODO-YOLO-vote: P=0.87, R=0.75, F1=0.804\n",
      "HODO-DFSD-vote (no tracking): P=0.78, R=0.63, F1=0.7\n",
      "HODO-MTCNN-vote (no tracking): P=0.76, R=0.64, F1=0.695\n",
      "HODO-YOLO-vote (no tracking): P=0.78, R=0.67, F1=0.722\n",
      "Elapsed: 904.4151504039764 seconds\n"
     ]
    }
   ],
   "source": [
    "# Tables 4, 5, 6\n",
    "# Run evaluation for for the specified options (detector + classifier)\n",
    "# Takes around 15 minutes to execute all combinations\n",
    "# To compute the overall results per table, do the average between channels.\n",
    "# e.g. Table 5: Overall-YOLO-VOTE --> 1/3 (CNN-YOLO-VOTE + FOX-YOLO-VOTE + MSNBC-YOLO-VOTE)\n",
    "# e.g. Table 6: Overall-YOLO-VOTE --> 1/2 (NHK-YOLO-VOTE + HODO-YOLO-VOTE)\n",
    "for channel in channel_list:\n",
    "    models_path = \"faces_politicians\" if channel in ['news7-lv', 'hodost-lv'] else \"faces_us_individuals\"\n",
    "    for classifier in classifier_list:\n",
    "        for detector in detector_list:\n",
    "            for feats in feats_list:\n",
    "                !python src/metrics.py train \"$channel\" --models_path data/\"$models_path\" --detector \"$detector\" --feats \"$feats\" --mod_feat \"$classifier\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "qGhdANW1skQ6"
   },
   "source": [
    "## Replication of Figures 5-6\n",
    "#### Method performance with respect to face size\n",
    "###### The results reported in the paper for all channels are for the following configuration:\n",
    "###### **Detector**: YOLO, **Classifier**: vote"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "Pa5QwBliZrMd"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Migrating database to v0.20.1\n",
      "data/dataset/train/MSNBCW\n",
      " 10 [4.0s elapsed, 2.7 samples/s]   \n",
      " 100% |█████| 1589/1589 [1.3s elapsed, 0s remaining, 1.2K samples/s]         \n",
      "data/dataset/train/news7-lv\n",
      " 9 [36.8s elapsed, 0.2 samples/s] \n",
      " 100% |███| 34636/34636 [18.2s elapsed, 0s remaining, 1.7K samples/s]       \n",
      "data/dataset/train/CNNW\n",
      " 10 [2.5s elapsed, 4.0 samples/s]   \n",
      " 100% |█████| 1083/1083 [917.4ms elapsed, 0s remaining, 1.2K samples/s]       \n",
      "data/dataset/train/hodost-lv\n",
      " 8 [30.0s elapsed, 0.3 samples/s] \n",
      " 100% |███| 32426/32426 [16.3s elapsed, 0s remaining, 2.1K samples/s]      \n",
      "data/dataset/train/FOXNEWSW\n",
      " 10 [4.1s elapsed, 2.4 samples/s]   \n",
      " 100% |█████| 1674/1674 [1.4s elapsed, 0s remaining, 1.2K samples/s]         \n",
      "Success\n"
     ]
    }
   ],
   "source": [
    "# Generate dataset with fiftyone\n",
    "# Takes around 5 minutes\n",
    "!python src/convert_dataset_to_fiftyone.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "ouEYm6igap4f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "hodost-lv\n",
      " 32426 [2.2m elapsed, 269.7 samples/s]  \n",
      "MSNBCW\n",
      " 1589 [7.7s elapsed, 209.5 samples/s]   \n",
      "FOXNEWSW\n",
      " 1674 [7.7s elapsed, 237.2 samples/s]   \n",
      "CNNW\n",
      " 1083 [5.2s elapsed, 216.0 samples/s]   \n",
      "news7-lv\n",
      " 34636 [2.4m elapsed, 254.8 samples/s]  \n",
      "Success!\n"
     ]
    }
   ],
   "source": [
    "# Populate the dataset with the detections/classification of the key individuals\n",
    "# Takes around 13 minutes\n",
    "!python src/convert_results_to_fiftyone.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "id": "IRT_7sqYfDRt"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['CNNW', 'FOXNEWSW', 'MSNBCW', 'hodost-lv', 'news7-lv']\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import fiftyone as fo\n",
    "import fiftyone.zoo as foz\n",
    "import fiftyone.brain as fob\n",
    "from fiftyone import ViewField as F\n",
    "\n",
    "print(fo.list_datasets())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "id": "cqeKUDfXOdDR"
   },
   "outputs": [],
   "source": [
    "#### Configuration options ####\n",
    "channel = \"news7-lv\" # news7-lv (NHK), hodost-lv (HODO Station), CNNW (CNN), FOXNEWSW (FOX), MSNBCW (MSNBC)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "TBdO_IAPyh09"
   },
   "source": [
    "#### Run the cells below to replicate the results for figures 5-6 of the specified dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "id": "9hT1vfowfL6O"
   },
   "outputs": [],
   "source": [
    "# Specify dataset\n",
    "dataset_orig = fo.load_dataset(channel)\n",
    "us_dataset_list = [\"CNNW\", \"FOXNEWSW\", \"MSNBCW\"]\n",
    "# Do \"evaluate_detections\" to compute iou to be able to threshold wrt iou for US data evaluation\n",
    "if dataset_orig.name in us_dataset_list:\n",
    "    dataset_orig.evaluate_detections(\"yolo-resnetv1-fcg_average_vote\", \"ground_truth\", eval_key=\"eval\", classwise=False)\n",
    "\n",
    "# Years list (2000-2022)\n",
    "years_list = [str(i) for i in range(2000, 2022)]\n",
    "view_analysis = dataset_orig.match(F(\"year\").is_in(years_list))\n",
    "\n",
    "# For US evaluation\n",
    "# Filter the detections based on the IoU threshold\n",
    "if dataset_orig.name in us_dataset_list:\n",
    "    view_analysis = view_analysis.filter_labels(\"yolo-resnetv1-fcg_average_vote\", F(\"eval_iou\") > 0.001).clone()\n",
    "\n",
    "# Generate different views depending on the bounding box sizes \n",
    "bbox_area = (\n",
    "    F(\"$metadata.width\") * F(\"bounding_box\")[2] *\n",
    "    F(\"$metadata.height\") * F(\"bounding_box\")[3]\n",
    ")\n",
    "# [very small, small, small-medium, medium, medium-large, large, very large]\n",
    "# Average bbox for NHK = 78x78, HODO = 52x52. US dataset around 135 x 135.\n",
    "# Smallest NHK = 3x3, HODO = 2x2. US = 35x35\n",
    "# Largest NHK = 258x258, HODO = 174x174. US = 390x390\n",
    "\n",
    "boxes_areas = list(map(int, list(np.asarray([8, 16, 32, 64, 96, 128, 156]) ** 2)))\n",
    "boxes_filter_list = []\n",
    "\n",
    "for i in range(len(boxes_areas)):\n",
    "    if i == 0:\n",
    "        # First case\n",
    "        boxes_filter = bbox_area <= boxes_areas[i]\n",
    "    else:\n",
    "        # Cases in the middle\n",
    "        boxes_filter = (bbox_area > boxes_areas[i-1]) & (bbox_area <= boxes_areas[i])\n",
    "\n",
    "    boxes_filter_list.append(boxes_filter)\n",
    "        \n",
    "# Last case\n",
    "boxes_filter_list.append(bbox_area > boxes_areas[-1])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "id": "mMBfilg8ri9R"
   },
   "outputs": [],
   "source": [
    "# Generate views that contains only the filtered bboxes depending on size\n",
    "views_list = []\n",
    "\n",
    "for box_filter in boxes_filter_list:\n",
    "#for box_filter in [small_boxes, medium_boxes]:\n",
    "    view_filtered = (\n",
    "        view_analysis\n",
    "        .filter_labels(\"ground_truth\", box_filter)\n",
    "        .filter_labels(\"yolo-resnetv1-fcg_average_vote\", box_filter)\n",
    "        .filter_labels(\"yolo-resnetv1-fcg_average_vote\", F(\"label\") != \"-1\")\n",
    "    )\n",
    "    views_list.append(view_filtered)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "id": "kthZjPayrlmh"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating detections...\n",
      " 100% |█████████████████████| 0/0 [128.0ms elapsed, ? remaining, ? samples/s] \n",
      "Performing IoU sweep...\n",
      " 100% |█████████████████████| 0/0 [128.2ms elapsed, ? remaining, ? samples/s] \n",
      "Evaluating detections...\n",
      " 100% |█████████████████| 164/164 [1.2s elapsed, 0s remaining, 140.6 samples/s]         \n",
      "Performing IoU sweep...\n",
      " 100% |█████████████████| 164/164 [809.1ms elapsed, 0s remaining, 202.7 samples/s]      \n",
      "Evaluating detections...\n",
      " 100% |███████████████| 1224/1224 [12.9s elapsed, 0s remaining, 156.7 samples/s]      \n",
      "Performing IoU sweep...\n",
      " 100% |███████████████| 1224/1224 [7.7s elapsed, 0s remaining, 199.3 samples/s]      \n",
      "Evaluating detections...\n",
      " 100% |███████████████| 3582/3582 [22.8s elapsed, 0s remaining, 180.4 samples/s]      \n",
      "Performing IoU sweep...\n",
      " 100% |███████████████| 3582/3582 [14.4s elapsed, 0s remaining, 286.3 samples/s]      \n",
      "Evaluating detections...\n",
      " 100% |███████████████| 4951/4951 [25.9s elapsed, 0s remaining, 188.1 samples/s]      \n",
      "Performing IoU sweep...\n",
      " 100% |███████████████| 4951/4951 [16.4s elapsed, 0s remaining, 301.2 samples/s]      \n",
      "Evaluating detections...\n",
      " 100% |█████████████████| 406/406 [2.2s elapsed, 0s remaining, 198.8 samples/s]      \n",
      "Performing IoU sweep...\n",
      " 100% |█████████████████| 406/406 [1.4s elapsed, 0s remaining, 317.6 samples/s]         \n",
      "Evaluating detections...\n",
      " 100% |███████████████████| 57/57 [412.8ms elapsed, 0s remaining, 138.1 samples/s]      \n",
      "Performing IoU sweep...\n",
      " 100% |███████████████████| 57/57 [312.2ms elapsed, 0s remaining, 182.6 samples/s]      \n",
      "Evaluating detections...\n",
      " 100% |███████████████████| 11/11 [187.3ms elapsed, 0s remaining, 58.7 samples/s] \n",
      "Performing IoU sweep...\n",
      " 100% |███████████████████| 11/11 [166.6ms elapsed, 0s remaining, 66.0 samples/s] \n"
     ]
    }
   ],
   "source": [
    "# Run evaluation for the generated filtered views\n",
    "results_list = []\n",
    "if dataset_orig.name in us_dataset_list:\n",
    "    iou_threshs = [0.4, 0.45, 0.5, 0.55, 0.6]\n",
    "else:\n",
    "    iou_threshs = None\n",
    "\n",
    "for view_filtered in views_list:\n",
    "    results_filtered = view_filtered.evaluate_detections(\n",
    "        \"yolo-resnetv1-fcg_average_vote\",\n",
    "        gt_field=\"ground_truth\",\n",
    "        eval_key=\"eval\",\n",
    "        compute_mAP=True,\n",
    "        iou_threshs=iou_threshs,  # For US evaluation\n",
    "    )\n",
    "\n",
    "    results_list.append(results_filtered)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "id": "-8S9y_9ErrR6"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mAP: 0, F1: 0.0\n",
      "mAP: 11.9, F1: 0.426\n",
      "mAP: 53.3, F1: 0.707\n",
      "mAP: 65.8, F1: 0.811\n",
      "mAP: 76.2, F1: 0.959\n",
      "mAP: 88.2, F1: 0.973\n",
      "mAP: 84.8, F1: 1.0\n",
      "mAP: 86.5, F1: 1.0\n",
      "    area  box_size   map     f1\n",
      "0     64         8   0.0  0.000\n",
      "1    256        16  11.9  0.426\n",
      "2   1024        32  53.3  0.707\n",
      "3   4096        64  65.8  0.811\n",
      "4   9216        96  76.2  0.959\n",
      "5  16384       128  88.2  0.973\n",
      "6  24336       156  84.8  1.000\n",
      "7  34596       186  86.5  1.000\n"
     ]
    }
   ],
   "source": [
    "# Print numerical results\n",
    "rows_df = []\n",
    "# 186**2 is for visualization purposes, representing [156-]\n",
    "for res, box_area in zip(results_list, boxes_areas + [186**2]):\n",
    "    res_map = round((max(res.mAP(), 0) * 100), 1)\n",
    "    res_f1 = round(res.metrics()['fscore'], 3)\n",
    "    box_size = int(np.sqrt(box_area))\n",
    "    rows_df.append([box_area, box_size, res_map, res_f1])\n",
    "    print(f\"mAP: {res_map}, F1: {res_f1}\")\n",
    "\n",
    "df_res = pd.DataFrame(data=rows_df, columns=['area', 'box_size', 'map', 'f1'])\n",
    "print(df_res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "id": "neW5Qojcrtu3"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "        <script type=\"text/javascript\">\n",
       "        window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
       "        if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
       "        if (typeof require !== 'undefined') {\n",
       "        require.undef(\"plotly\");\n",
       "        requirejs.config({\n",
       "            paths: {\n",
       "                'plotly': ['https://cdn.plot.ly/plotly-2.20.0.min']\n",
       "            }\n",
       "        });\n",
       "        require(['plotly'], function(Plotly) {\n",
       "            window._Plotly = Plotly;\n",
       "        });\n",
       "        }\n",
       "        </script>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>                            <div id=\"1338206b-5e23-4502-875d-cd8052e4c04e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                require([\"plotly\"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"1338206b-5e23-4502-875d-cd8052e4c04e\")) {                    Plotly.newPlot(                        \"1338206b-5e23-4502-875d-cd8052e4c04e\",                        [{\"hovertemplate\":\"box_size=%{x}<br>map=%{text}<extra></extra>\",\"legendgroup\":\"\",\"line\":{\"color\":\"#636efa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"markers+text+lines\",\"name\":\"\",\"orientation\":\"v\",\"showlegend\":false,\"text\":[0.0,11.9,53.3,65.8,76.2,88.2,84.8,86.5],\"x\":[8,16,32,64,96,128,156,186],\"xaxis\":\"x\",\"y\":[0.0,11.9,53.3,65.8,76.2,88.2,84.8,86.5],\"yaxis\":\"y\",\"type\":\"scatter\",\"textposition\":\"bottom right\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"Bounding box size\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"mAP\"}},\"legend\":{\"tracegroupgap\":0},\"title\":{\"text\":\"mAP per bounding box size for news7-lv\"}},                        {\"responsive\": true}                    ).then(function(){\n",
       "                            \n",
       "var gd = document.getElementById('1338206b-5e23-4502-875d-cd8052e4c04e');\n",
       "var x = new MutationObserver(function (mutations, observer) {{\n",
       "        var display = window.getComputedStyle(gd).display;\n",
       "        if (!display || display === 'none') {{\n",
       "            console.log([gd, 'removed!']);\n",
       "            Plotly.purge(gd);\n",
       "            observer.disconnect();\n",
       "        }}\n",
       "}});\n",
       "\n",
       "// Listen for the removal of the full notebook cells\n",
       "var notebookContainer = gd.closest('#notebook-container');\n",
       "if (notebookContainer) {{\n",
       "    x.observe(notebookContainer, {childList: true});\n",
       "}}\n",
       "\n",
       "// Listen for the clearing of the current output cell\n",
       "var outputEl = gd.closest('.output');\n",
       "if (outputEl) {{\n",
       "    x.observe(outputEl, {childList: true});\n",
       "}}\n",
       "\n",
       "                        })                };                });            </script>        </div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>                            <div id=\"5b206823-9157-426a-9713-758684f5c807\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                require([\"plotly\"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"5b206823-9157-426a-9713-758684f5c807\")) {                    Plotly.newPlot(                        \"5b206823-9157-426a-9713-758684f5c807\",                        [{\"hovertemplate\":\"box_size=%{x}<br>f1=%{text}<extra></extra>\",\"legendgroup\":\"\",\"line\":{\"color\":\"#636efa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"markers+text+lines\",\"name\":\"\",\"orientation\":\"v\",\"showlegend\":false,\"text\":[0.0,0.426,0.707,0.811,0.959,0.973,1.0,1.0],\"x\":[8,16,32,64,96,128,156,186],\"xaxis\":\"x\",\"y\":[0.0,0.426,0.707,0.811,0.959,0.973,1.0,1.0],\"yaxis\":\"y\",\"type\":\"scatter\",\"textposition\":\"bottom right\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"Bounding box size\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"F-score\"}},\"legend\":{\"tracegroupgap\":0},\"title\":{\"text\":\"F-score per bounding box size for news7-lv\"}},                        {\"responsive\": true}                    ).then(function(){\n",
       "                            \n",
       "var gd = document.getElementById('5b206823-9157-426a-9713-758684f5c807');\n",
       "var x = new MutationObserver(function (mutations, observer) {{\n",
       "        var display = window.getComputedStyle(gd).display;\n",
       "        if (!display || display === 'none') {{\n",
       "            console.log([gd, 'removed!']);\n",
       "            Plotly.purge(gd);\n",
       "            observer.disconnect();\n",
       "        }}\n",
       "}});\n",
       "\n",
       "// Listen for the removal of the full notebook cells\n",
       "var notebookContainer = gd.closest('#notebook-container');\n",
       "if (notebookContainer) {{\n",
       "    x.observe(notebookContainer, {childList: true});\n",
       "}}\n",
       "\n",
       "// Listen for the clearing of the current output cell\n",
       "var outputEl = gd.closest('.output');\n",
       "if (outputEl) {{\n",
       "    x.observe(outputEl, {childList: true});\n",
       "}}\n",
       "\n",
       "                        })                };                });            </script>        </div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Plot mAP and F1 score for different face sizes.\n",
    "# Note that figures 5-6 in the paper are post-processed to be the superposition of the channels.\n",
    "%matplotlib inline\n",
    "import plotly.express as px\n",
    "import plotly.io as pio\n",
    "pio.renderers.default = \"notebook_connected\"\n",
    "\n",
    "# mAP\n",
    "fig = px.line(df_res, x=\"box_size\", y=\"map\", text=\"map\", title=f\"mAP per bounding box size for {dataset_orig.name}\")\n",
    "fig.update_traces(textposition=\"bottom right\")\n",
    "\n",
    "fig.update_xaxes(\n",
    "    title=\"Bounding box size\"\n",
    ")\n",
    "fig.update_yaxes(\n",
    "    title=\"mAP\"\n",
    ")\n",
    "\n",
    "fig.write_image(f\"/results/results_map_face_size_{dataset_orig.name}.pdf\")\n",
    "fig.show()\n",
    "\n",
    "# F1 score\n",
    "fig = px.line(df_res, x=\"box_size\", y=\"f1\", text=\"f1\", title=f\"F-score per bounding box size for {dataset_orig.name}\")\n",
    "fig.update_traces(textposition=\"bottom right\")\n",
    "\n",
    "fig.update_xaxes(\n",
    "    title=\"Bounding box size\"\n",
    ")\n",
    "fig.update_yaxes(\n",
    "    title=\"F-score\"\n",
    ")\n",
    "\n",
    "fig.write_image(f\"/results/results_f1_face_size_{dataset_orig.name}.pdf\")\n",
    "fig.show()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyPsGpymaZzHvbb1BuZk0TZY",
   "gpuType": "T4",
   "include_colab_link": true,
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
